library(tidyverse)
library(ade4) # Coinertia
library(vegan) # Rarefaction
library(ggrepel)
library(metacoder)
## Warning: le package 'metacoder' a été compilé avec la version R 4.3.3
main_theme = theme_minimal()+
theme(line = element_blank(),
axis.line = element_line(colour = "black"),
panel.border = element_blank(),
axis.ticks = element_line(colour = "black"),
axis.text.x = element_text(colour = "black", size=22, face="italic", angle = 45, vjust = 1, hjust = 1),
axis.text.y = element_text(colour = "black", size=22, face="italic"),
legend.title = element_text(colour = "black", size=20,
hjust =0.5),
legend.text = element_text(colour = "black", size=18),
axis.title= element_text(size=28),
strip.text = element_text(colour = "black", size=15, face ="italic"))
metab1 = "data/metabarcoding_vs_metagenomics/oracle_metaBt0_16S_samples_run_20190715_kraken2_assignment_genuses.tsv"
#metab2 = "data/metabarcoding_vs_metagenomics/oracle_metaBt0_16S_samples_run_20200106_kraken2_assignment_genuses.tsv"
metag1 = "data/metabarcoding_vs_metagenomics/oracle_metaGt0_SAMA_12_samples_kraken2_SSU_assignment_genuses.tsv"
#metag2 = "data/metabarcoding_vs_metagenomics/oracle_metaGt0_SAMA_21_1_samples_kraken2_SSU_assignment_genuses.tsv"
#metag3 = "data/metabarcoding_vs_metagenomics/oracle_metaGt0_SAMA_21_2_samples_kraken2_SSU_assignment_genuses.tsv"
taxonomic_levels <- c("kingdom", "phylum", "class", "order", "family",
"genus", "species")
## ---- Arrange metaB ----
# Select data from bracken method, and rename columns
select_and_rename_cols = function(tab, method = "G"){
metag1%>%
read_tsv(col_names = T, skip = 1)%>%
select(which(str_detect(colnames(.),"bracken_genuses")), "#OTU ID", "taxonomy")%>%
rename_with( # rename colnames
.%>%
str_remove( "_bracken_genuses")%>%
str_replace_all("^OR-", "BU_") %>%
str_replace_all("-", "_")%>%
str_remove("_S\\d+")%>%
str_replace("((?<!.)T_)(\\d)", "CONT_BU_PCR_\\2")%>%
str_replace("(BU_T_extr)", "CONT_BU_ext")%>%
str_replace("#OTU ID", "OTU")%>%
str_replace("(BU_[A-Z]+_)(\\d{1})(_R\\d+)", "\\10\\2\\3"
))%>% # add 0 before 1-Digit Numbers of soil code
rename_with(~ paste0(., "_", method), contains("BU"))%>% # add B or G at the end of soil code depending on the method
separate_wider_delim(taxonomy, names = taxonomic_levels, delim = "; ", cols_remove = F)%>% # Separate taxonomy colums in 6 colums of taxonomic levels
filter(kingdom == "k__Bacteria")
}
Compute the total number of reads for each OTU present in control samples. That sum is then substracted from all occurrences of that OTU in true samples. The rationale is as follows:
Control samples can be eliminated from the statistical analysis after the substraction (all OTUs present in control samples have been zeroed out).
.%>%
#select(!ends_with(run_rm))%>%
replace(. == 0, NA) %>%
pivot_longer(starts_with("BU"), names_to = "samples", values_to = "reads") %>%
filter(!is.na(reads)) %>%
#{{merge control samples}}
mutate( n = rowSums(across(starts_with("CONT")), na.rm = T))%>%
#{{substract abundance of control samples}}
mutate(reads = case_when(
is.na(n) ~ reads,
n > reads ~ 0,
TRUE ~ reads - n)) %>%
select(-n,-starts_with("CONT"))%>%
pivot_wider(values_from = reads, names_from = samples, values_fill = 0) -> decontaminate
metab1%>%
select_and_rename_cols("B")%>%
decontaminate -> metab1_decont_table
## Rows: 4826 Columns: 318
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): taxonomy
## dbl (317): #OTU ID, BU-RT-01_R1, BU-RT-01_R1_bracken_genuses, BU-RT-01_R2, B...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metag1%>%
select_and_rename_cols("G")%>%
decontaminate -> metag1_decont_table
## Rows: 4826 Columns: 318
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): taxonomy
## dbl (317): #OTU ID, BU-RT-01_R1, BU-RT-01_R1_bracken_genuses, BU-RT-01_R2, B...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metag1_decont_table%>%
full_join( metab1_decont_table, by = join_by(OTU == OTU, kingdom==kingdom, phylum == phylum,
class == class, order == order, family == family,
genus == genus, species == species, taxonomy == taxonomy))%>%
mutate(across(where(is.numeric), ~ replace(., is.na(.), 0))) -> meta_g_b_decond_table
metab1_decont_table%>%
select(starts_with("BU"))%>%rowSums
## [1] 28932 3369 30 8186 6592 57 209 91909 44351
## [10] 5604 5758 812 765 68 209 19 7587 7951
## [19] 247 25 1188 50 180 54 130 49 17893
## [28] 77 90 19 147 161 14638 1452 22 167
## [37] 2577 90 334 317 1933 34 297 22 326
## [46] 106 629 4415 61 2383 1275 218 280 80
## [55] 36 62087 46571 3258 1308 73 51053 64149 1471
## [64] 102 36 83 489030 57755 37880 407873 794082 10
## [73] 22892 5428983 198 50 16726 3576 3470 7167 18
## [82] 13771 314935 1339 846 1017 70 20259 21683 391
## [91] 9 85 80 19 48 4833 140 3044 607762
## [100] 59109 14636 6176 103 365 44 10767 7671 2602
## [109] 350 56 942 148 155 73164 2611 1929676 1041241
## [118] 2678267 2994 19 804644 1462 116227 1347772 2651 1094662
## [127] 1199 5239 37 26 14507 23 13682 8104 972
## [136] 66 14745 66 461 28 837 2327 125 1940
## [145] 10278 12353 1104 54 16 10053 27 16 743136
## [154] 178 279 169 4574 1012 1023 103 58 40863
## [163] 157 61 930 80 125 129 29 680 7606
## [172] 3588 4357 237 316 511 136 1288 39 13
## [181] 738 4935 19 138 19 159889 107040 494 991
## [190] 969 25 637 236 113 40 101 22404 221
## [199] 248 1744 4269 6702 2594 39880 4619 81 7043
## [208] 15 218 43 256 878 533 60 86 5233
## [217] 23902 10363 8733 3190 6354 16 60 474 7670
## [226] 936 873 3994 688 7124 372 135 29 115
## [235] 5880 53 10 4166 753 10 23445 15 160
## [244] 8233 9 196282 85 3333 32 77 11821 1333
## [253] 18 56 17 75255 1052 161 2441 104 20
## [262] 44 12 90 165 24 21 25 10 201
Is it normal that some OTU have 0 reads ?
rarefaction.func = function(df,rar_sample, rarcur = T){
if(rarcur){ # {{ if rarefaction curve needed}}
metab1_decont_table%>%
select(starts_with("BU"))%>%
t()%>%rarecurve(step = 20, sample = rar_sample, col = "blue", cex = 0.6)
}
# {{rarefaction}}
df%>%
select(starts_with("BU"))%>%
t()%>%
rrarefy(rar_sample)%>%
t()%>%
bind_cols(
df%>%select(!starts_with("BU"))
)-> df_rarefy
return(df_rarefy)
}
meta_g_b_decond_table%>%
select(starts_with("BU"))%>%
t()%>%rowSums%>%sort%>%log10%>%plot(1:length(.),.)
Gap around 10^3.8 => rarefy at this value
meta_g_b_decond_table%>%rarefaction.func(rar_sample =round(10^3.8), rarcur = T) -> meta_g_b_decond_table_rare
## Warning in rarecurve(., step = 20, sample = rar_sample, col = "blue", cex =
## 0.6): most observed count data have counts 1, but smallest count is 5
## Warning in rrarefy(., rar_sample): function should be used for observed counts,
## but smallest count is 5
meta_g_b_decond_table_rare$BU_TS_15
## Warning: Unknown or uninitialised column: `BU_TS_15`.
## NULL
meta_g_b_decond_table$BU_TS_15
## Warning: Unknown or uninitialised column: `BU_TS_15`.
## NULL
##Coinertia
meta_g_b_decond_table_rare%>%
column_to_rownames("OTU")%>%
select(ends_with("R1_B"))%>%
select(order(colnames(.)))%>%
decostand(method = "hellinger")%>%
dudi.pca( scale = TRUE, scan = FALSE, nf = 3) -> dudi_b_r1
meta_g_b_decond_table_rare%>%
column_to_rownames("OTU")%>%
select(ends_with("R2_B"))%>%
select(order(colnames(.)))%>%
decostand(method = "hellinger")%>%
dudi.pca( scale = TRUE, scan = FALSE, nf = 3) -> dudi_b_r2
meta_g_b_decond_table_rare%>%
column_to_rownames("OTU")%>%
select(ends_with("R1_G"))%>%
select(order(colnames(.)))%>%
decostand(method = "hellinger")%>%
dudi.pca( scale = TRUE, scan = FALSE, nf = 3) -> dudi_g_r1
meta_g_b_decond_table_rare%>%
column_to_rownames("OTU")%>%
select(ends_with("R2_G"))%>%
select(order(colnames(.)))%>%
decostand(method = "hellinger")%>%
dudi.pca( scale = TRUE, scan = FALSE, nf = 3) -> dudi_g_r2
# Disjoint R1 and R2 and decontaminate
I’m doing the Hellinger transformation on the matrix (OTU x site). Usually the transformation (and the coinertia by extension) is done on the transpose of this matrix (site x species/OUT).It is because most of the time we are interested in the difference of species composition in sites. However here we are interested in the species detection difference, in other word we are looking for difference of species abundances in sites
/! Maybe do separate or merge Hellinger transformation on the 2 df ?
### ---- better graph representation for coinertia
coin.graph = function(coin_obj,meta_obj, brin, method){
# {{recover coordinate in the ordination space of the 2 method to compaire}}
cbind(coin_obj$mX,coin_obj$mY) -> df_coin_pos
colnames(df_coin_pos)=c("metho1_x", "metho1_y", "metho2_x", "metho2_y")
df_coin_pos%>%
rownames_to_column("OTU")%>%
mutate(OTU = as.numeric(OTU))%>%
# {{recover OTU and genus information}}
left_join(meta_obj%>%select(OTU, genus), by = "OTU")%>%
# {{Find taxa with the higher and lower diff (lower and higer distance in the ordination space between metho1(x,y) and metho2(x,y) )}}
mutate(dist = sqrt(rowSums((coin1$mX-coin1$mY)^2)))%>%
arrange(desc(dist)) %>%
slice(c(1:10, (n() - 9):n()))%>%
# {{add a colums to facet_wrap}}
mutate(diff = c(rep("Strong",10 ), rep("Weak", 10)))%>%
ggplot()+
facet_wrap(~diff)+
geom_label_repel(aes(x= metho1_x, y = metho1_y ,label = genus))+
geom_segment(aes(x = metho1_x, y = metho1_y, xend = metho2_x, yend = metho2_y),
arrow = arrow(length = unit(0.3, "cm"), type = "closed"), cex =1)+
labs(x="X",y = "Y", title = paste("Genera estimations differences between", brin[1],method[1], "(arrow tail) and", brin[2],method[2], "(arrow head) across 80 samples"))+
main_theme
}
coin1 <- coinertia(dudi_b_r1,dudi_b_r2, scan = FALSE, nf = 2)
summary(coin1)
## Coinertia analysis
##
## Class: coinertia dudi
## Call: coinertia(dudiX = dudi_b_r1, dudiY = dudi_b_r2, scannf = FALSE,
## nf = 2)
##
## Total inertia: 331.3
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 304.687 9.059 2.714 1.264 1.013
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 91.9565 2.7340 0.8191 0.3816 0.3058
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 91.96 94.69 95.51 95.89 96.20
##
## (Only 5 dimensions (out of 77) are shown)
##
## Eigenvalues decomposition:
## eig covar sdX sdY corr
## 1 304.686922 17.455283 4.762115 4.667198 0.7853636
## 2 9.058705 3.009768 2.063545 1.627278 0.8963082
##
## Inertia & coinertia X (dudi_b_r1):
## inertia max ratio
## 1 22.67774 22.79292 0.9949467
## 12 26.93596 27.08917 0.9943440
##
## Inertia & coinertia Y (dudi_b_r2):
## inertia max ratio
## 1 21.78274 21.97014 0.9914702
## 12 24.43078 24.68268 0.9897941
##
## RV:
## 0.5854531
plot(coin1)
coin.graph(coin1,meta_obj= meta_g_b_decond_table_rare, brin = c("R1","R2"), method = c("metaB", "metaB"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
coin2 <- coinertia(dudi_g_r1,dudi_g_r2, scan = FALSE, nf = 2)
summary(coin2)
## Coinertia analysis
##
## Class: coinertia dudi
## Call: coinertia(dudiX = dudi_g_r1, dudiY = dudi_g_r2, scannf = FALSE,
## nf = 2)
##
## Total inertia: 354.7
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 327.938 9.690 2.644 1.307 1.134
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 92.4675 2.7324 0.7456 0.3685 0.3198
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 92.47 95.20 95.95 96.31 96.63
##
## (Only 5 dimensions (out of 77) are shown)
##
## Eigenvalues decomposition:
## eig covar sdX sdY corr
## 1 327.938234 18.109065 4.778757 4.742291 0.7990847
## 2 9.690353 3.112933 2.057904 1.700255 0.8896736
##
## Inertia & coinertia X (dudi_g_r1):
## inertia max ratio
## 1 22.83652 22.93899 0.9955330
## 12 27.07149 27.23622 0.9939518
##
## Inertia & coinertia Y (dudi_g_r2):
## inertia max ratio
## 1 22.48933 22.70778 0.9903798
## 12 25.38020 25.66096 0.9890585
##
## RV:
## 0.6054525
plot(coin2)
coin.graph(coin2,meta_obj= meta_g_b_decond_table_rare, brin = c("R1","R2"), method = c("metaG", "metaG"))
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
coin3 <- coinertia(dudi_b_r1,dudi_g_r1, scan = FALSE, nf = 2)
summary(coin3)
## Coinertia analysis
##
## Class: coinertia dudi
## Call: coinertia(dudiX = dudi_b_r1, dudiY = dudi_g_r1, scannf = FALSE,
## nf = 2)
##
## Total inertia: 590.6
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 522.582 18.326 5.195 3.310 2.431
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 88.4900 3.1032 0.8796 0.5606 0.4116
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 88.49 91.59 92.47 93.03 93.44
##
## (Only 5 dimensions (out of 79) are shown)
##
## Eigenvalues decomposition:
## eig covar sdX sdY corr
## 1 522.58164 22.860045 4.774173 4.789446 0.9997551
## 2 18.32606 4.280895 2.072342 2.072574 0.9966972
##
## Inertia & coinertia X (dudi_b_r1):
## inertia max ratio
## 1 22.79273 22.79292 0.9999917
## 12 27.08733 27.08917 0.9999318
##
## Inertia & coinertia Y (dudi_g_r1):
## inertia max ratio
## 1 22.93879 22.93899 0.9999914
## 12 27.23436 27.23622 0.9999315
##
## RV:
## 0.9967147
plot(coin3)
coin.graph(coin3,meta_obj= meta_g_b_decond_table_rare, brin = c("R1","R1"), method = c("metaB", "metaG"))
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
coin4 <- coinertia(dudi_b_r2,dudi_g_r2, scan = FALSE, nf = 2)
summary(coin4)
## Coinertia analysis
##
## Class: coinertia dudi
## Call: coinertia(dudiX = dudi_b_r2, dudiY = dudi_g_r2, scannf = FALSE,
## nf = 2)
##
## Total inertia: 518.2
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 489.144 6.432 2.634 1.519 1.189
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 94.4003 1.2414 0.5084 0.2932 0.2295
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 94.40 95.64 96.15 96.44 96.67
##
## (Only 5 dimensions (out of 77) are shown)
##
## Eigenvalues decomposition:
## eig covar sdX sdY corr
## 1 489.144172 22.116604 4.686695 4.764793 0.9903933
## 2 6.432355 2.536209 1.620178 1.699920 0.9208603
##
## Inertia & coinertia X (dudi_b_r2):
## inertia max ratio
## 1 21.96511 21.97014 0.9997710
## 12 24.59009 24.68268 0.9962486
##
## Inertia & coinertia Y (dudi_g_r2):
## inertia max ratio
## 1 22.70325 22.70778 0.9998006
## 12 25.59298 25.66096 0.9973507
##
## RV:
## 0.9260847
plot(coin4)
coin.graph(coin4,meta_obj= meta_g_b_decond_table_rare, brin = c("R2","R2"), method = c("metaB", "metaG"))
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
make.tax.tree.comp = function(df,method){
meta_g_b_decond_table_rare %>%
select(OTU, kingdom, phylum, class, order, family, genus, species, taxonomy, ends_with(method[1]), ends_with(method[2]))%>%
# {{standardization of the table produce by the "method 1" (first method put in the vector)}}
mutate(across(ends_with(method[1]), ~ sqrt(. / rowSums(across(ends_with(method[1])))), .names = "hellinger_{col}"),
# {{standardization of the table produce by the "method 2" (second method put in the vector)}}
across(ends_with(method[2]), ~ sqrt(. / rowSums(across(ends_with(method[2])))), .names = "hellinger_{col}"),.keep = "unused")%>%
# {{remove species that don't appear in the 2 method that we compare in this chunk}}
mutate(total = rowSums(across(starts_with("hellinger_") )))%>%
filter(total !=0 )%>%
# {{create metacoder object}}
metacoder::parse_tax_data(class_cols = "taxonomy", # The column in the input table
class_sep = "; ",
class_regex = "^([a-z]{0,1})_{0,2}(.*)$",
class_key = c("tax_rank" = "taxon_rank", "name" = "taxon_name")) -> obj
# {{compute abudance (standadize by hellinger) per taxon for the 2 methode}}
obj$data$tax_abund <- metacoder::calc_taxon_abund(obj, "tax_data",
cols = startsWith(colnames(obj$data$tax_data),"hellinger"))
obj$data$tax_abund_group <- metacoder::calc_taxon_abund(obj, "tax_data",
cols = startsWith(colnames(obj$data$tax_data),"hellinger"),
groups = str_extract(str_subset(colnames(obj$data$tax_data), "_R\\d_[A-Z+]"), "R\\d_[A-Z+]"))
obj$data$diff_table = metacoder::compare_groups(obj, "tax_abund",
cols = (startsWith(colnames(obj$data$tax_abund),"hellinger")),
groups = str_extract(str_subset(colnames(obj$data$tax_abund), "_R\\d_[A-Z+]"), "R\\d_[A-Z+]"))
obj = mutate_obs(obj, "diff_table",
wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr"))
# {{Ns. set to 0}}
obj$data$diff_table%>%
mutate(median_diff = case_when(wilcox_p_value > 0.05 ~0,
.default = median_diff),
log2_median_ratio = case_when(wilcox_p_value > 0.05 ~0,
.default = median_diff))
return(obj)
}
meta_g_b_decond_table_rare%>%
make.tax.tree.comp(method = c("R1_B","R2_B")) -> obj1
## Summing per-taxon counts from 158 columns for 627 taxa
## Summing per-taxon counts from 158 columns in 2 groups for 627 taxa
obj1%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = gsub(pattern = "\\[|\\]", replacement = "", taxon_names),
node_size = R1_B+R2_B,
node_color = R1_B-R2_B,
node_color_range = c("blue", "gray", "red"),
node_color_interval = c(-max(abs(R1_B-R2_B)), max(abs(R1_B-R2_B))),
node_color_axis_label = "Standardized abudance \n difference across site (R1_B-R2_B)",
node_size_axis_label = "OTU abudance sum per taxon\n (standard hellinger)",
layout = "davidson-harel", initial_layout = "reingold-tilford")
Here we see that in the phylum Protobacteia the class Gammaprotobacteria is mostly detected by R1 and Alphaprotobacteria is mostly detected by R1. Fimicutes and Actinobacteria are better detected by R1
obj1%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10, 10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
obj1 %>%
mutate_obs("cleaned_names", gsub(taxon_names, pattern = "\\[|\\]", replacement = "")) %>%
metacoder::filter_taxa(grepl(cleaned_names, pattern = "^[a-zA-Z]+$")) %>%
heat_tree(node_label = cleaned_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10, 10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "da", initial_layout = "re", # good layout for large trees
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
## Adding a new "character" vector of length 627.
obj1%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = median_diff, # difference between groups
node_color_interval = c(-max(abs(median_diff)), max(abs(median_diff))), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
meta_g_b_decond_table_rare%>%
make.tax.tree.comp(method = c("R1_G","R2_G")) -> obj2
## Summing per-taxon counts from 158 columns for 599 taxa
## Summing per-taxon counts from 158 columns in 2 groups for 599 taxa
obj2%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = gsub(pattern = "\\[|\\]", replacement = "", taxon_names),
node_size = R1_G+R2_G,
node_color = R1_G-R2_G,
node_color_range = c("blue", "gray", "red"),
node_color_interval = c(-max(abs(R1_G-R2_G)), max(abs(R1_G-R2_G))),
node_color_axis_label = "Standardized abudance \n difference across site (R1_G-R2_G)",
node_size_axis_label = "OTU abudance sum per taxon\n (standard hellinger)",
layout = "davidson-harel", initial_layout = "reingold-tilford")
Different pattern for metaG than metaB here in phylum Protobacteia the class Gammaprotobacteria and Alphaprotobacteria are both more detected by R1. Fimicutes and Actinobacteria are also better detected by R1
obj2%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10, 10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
obj2 %>%
mutate_obs("cleaned_names", gsub(taxon_names, pattern = "\\[|\\]", replacement = "")) %>%
metacoder::filter_taxa(grepl(cleaned_names, pattern = "^[a-zA-Z]+$")) %>%
heat_tree(node_label = cleaned_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10, 10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "da", initial_layout = "re", # good layout for large trees
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
## Adding a new "character" vector of length 599.
obj2%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = median_diff, # difference between groups
node_color_interval = c(-max(abs(median_diff)), max(abs(median_diff))), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
### Metabarcoding R1 vs Metagenomic R1
meta_g_b_decond_table_rare%>%
make.tax.tree.comp(method = c("R1_B","R1_G")) -> obj3
## Summing per-taxon counts from 158 columns for 745 taxa
## Summing per-taxon counts from 158 columns in 2 groups for 745 taxa
obj3%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = gsub(pattern = "\\[|\\]", replacement = "", taxon_names),
node_size = R1_B+R1_G,
node_color = R1_B-R1_G,
node_color_range = c("blue", "gray", "red"),
node_color_interval = c(-max(abs(R1_B - R1_G)), max(abs(R1_B - R1_G))),
node_color_axis_label = "Standardized abudance \n difference across site (R1_B-R1_G)",
node_size_axis_label = "OTU abudance sum per taxon\n (standard hellinger)",
layout = "davidson-harel", initial_layout = "reingold-tilford")
obj3%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10,10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
obj3 %>%
mutate_obs("cleaned_names", gsub(taxon_names, pattern = "\\[|\\]", replacement = "")) %>%
metacoder::filter_taxa(grepl(cleaned_names, pattern = "^[a-zA-Z]+$")) %>%
heat_tree(node_label = cleaned_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10, 10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "da", initial_layout = "re", # good layout for large trees
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
## Adding a new "character" vector of length 745.
obj3%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = median_diff, # difference between groups
node_color_interval = c(-max(abs(median_diff)), max(abs(median_diff))), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
meta_g_b_decond_table_rare%>%
make.tax.tree.comp(method = c("R2_B","R2_G")) -> obj4
## Summing per-taxon counts from 158 columns for 651 taxa
## Summing per-taxon counts from 158 columns in 2 groups for 651 taxa
obj4%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = gsub(pattern = "\\[|\\]", replacement = "", taxon_names),
node_size = R2_B+R2_G,
node_color = R2_B-R2_G,
node_color_range = c("blue", "gray", "red"),
node_color_interval = c(-max(abs(R2_B-R2_G)), max(abs(R2_B-R2_G))),
node_color_axis_label = "Standardized abudance \n difference across site (R2_B-R2_G)",
node_size_axis_label = "OTU abudance sum per taxon\n (standard hellinger)",
layout = "davidson-harel", initial_layout = "reingold-tilford")
#### Better version with statistical significance
obj4%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10, 10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
obj4 %>%
mutate_obs("cleaned_names", gsub(taxon_names, pattern = "\\[|\\]", replacement = "")) %>%
metacoder::filter_taxa(grepl(cleaned_names, pattern = "^[a-zA-Z]+$")) %>%
heat_tree(node_label = cleaned_names,
node_size = n_obs, # number of OTUs
node_color = log2_median_ratio, # difference between groups
node_color_interval = c(-10, 10), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "da", initial_layout = "re", # good layout for large trees
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
## Adding a new "character" vector of length 651.
obj4%>%
filter_taxa(taxon_ranks == "o", supertaxa = TRUE)%>%
heat_tree(node_label = taxon_names,
node_size = n_obs, # number of OTUs
node_color = median_diff, # difference between groups
node_color_interval = c(-max(abs(median_diff)), max(abs(median_diff))), # symmetric interval
node_color_range = c("cyan", "gray", "magenta"), # diverging colors
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 ratio of median counts",
layout = "davidson-harel", initial_layout = "reingold-tilford",
title = paste(unique(treatment_1) ,"vs", unique(treatment_2)))
meta_g_b_decond_table_rare%>%
pivot_longer(starts_with("BU"), names_to = "ID", values_to = "abundance")%>%
mutate(method = str_extract(ID, "R\\d+_[BG]"),
soil_code = str_extract(ID, "^BU_[A-Z]+_\\d+"))%>%
group_by(ID)%>%
mutate(relative_abudance = abundance/sum(abundance))%>%
ggplot()+
facet_wrap(~soil_code)+
geom_col(aes(method,relative_abudance ,fill = order))+
main_theme+
theme(legend.position="none",
axis.text.x =element_text(angle = 90,vjust = 1, hjust = 1, size = 6, face="italic"))
meta_g_b_decond_table_rare%>%
pivot_longer(starts_with("BU"), names_to = "ID", values_to = "abundance")%>%
mutate(method = str_extract(ID, "R\\d+_[BG]"),
soil_code = str_extract(ID, "^BU_[A-Z]+_\\d+"))%>%
group_by(ID)%>%
mutate(relative_abudance = abundance/sum(abundance))%>%
ungroup()%>%
group_by(order, method)%>%
summarise(relative_abudance_mean = mean(relative_abudance))%>%
ggplot()+
#facet_wrap(~order)+
geom_raster(aes(method,order, fill = relative_abudance_mean))+
main_theme+
theme(legend.position="none",
axis.text.x =element_text(angle = 90,vjust = 1, hjust = 1, size = 6, face="italic"))
## `summarise()` has grouped output by 'order'. You can override using the
## `.groups` argument.